

* Please change the “path” to the working directory to run the code

global path "data"
global path_raw "data/raw"
global path_analysis "data/analysis_data"
global path_output "data/output_tables_figures"

/*----------------------------------------------------------------------------*/

// HMDA Data - Calculating Denial Rate//

* The raw data is publicly available (see README)
* Becasue of the data's large size, the replication package only includes a set of pseudo datasets
* The code below produces the analysis HMDA dataset for mortgage denial rate at the tract-year level
* In the replication package, the actual analysis data "hmda_ctn_year_analysis.dta" is included

forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/

	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	recast str2 state_code
	recast str3 county_code
	recast str12 ctn

	format state_code %2s
	format county_code %3s
	format ctn %12s
	
	order year state_code county_code ctn

	keep if loan_purpose==1 | loan_purpose==3 // home purchase | refinancing
	keep if property_type==1 // One-to-four family
	
	sort ctn
	forvalues i=1/5 {
		by ctn: egen a`i'=sum(action_type==`i')
	}
	
	/*
	action_type
	1 - Loan originated
	2 - Application approved but not accepted
	3 - Application denied by financial institution
	4 - Application withdrawn by applicant
	5 - File closed for incompleteness
	6 - Loan purchased by your institution
	7 - Preapproval request denied by financial institution
	8 - Preapproval request approved but not accepted 
	*/ 
	
	drop if action_type>=6
	
	gen denial_rate=a3/(a1+a2+a3) // tract-level denial rate
	gen denial_rate_robustness=a3/(a1+a2+a3+a4+a5) // tract-level denial rate, robustness check
	
	by ctn: keep if _n==1
	
	keep year ctn denial_rate denial_rate_robustness
		
	save "$path/temp_hmda_`yy'.dta", replace
}


clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_`yy'.dta"
	erase "$path/temp_hmda_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_analysis/hmda_ctn_year_analysis.dta", replace

/*----------------------------------------------------------------------------*/

// Construct covariates from the HMDA dataset 

forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/

	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	recast str2 state_code
	recast str3 county_code
	recast str12 ctn

	format state_code %2s
	format county_code %3s
	format ctn %12s
	
	order year state_code county_code ctn

	keep if loan_purpose==1 | loan_purpose==3 // home purchase | refinancing
	keep if property_type==1 // One-to-four family
	
	sort ctn
	
	/*
	action_type
	1 - Loan originated
	2 - Application approved but not accepted
	3 - Application denied by financial institution
	4 - Application withdrawn by applicant
	5 - File closed for incompleteness
	6 - Loan purchased by your institution
	7 - Preapproval request denied by financial institution
	8 - Preapproval request approved but not accepted 
	*/ 
	
	drop if action_type>=4
	
	gen male=0
	replace male=. if applicant_sex==3 | applicant_sex==4 // not provided
	replace male=1 if applicant_sex==1 & coapplicant_sex==5 // one applicant
	replace male=1 if applicant_sex==1 & coapplicant_sex==1 // two applicants, both male
	replace male=0.5 if applicant_sex==1 & coapplicant_sex!=5 & coapplicant_sex==2 // two applicants, one male + one female
	replace male=0.5 if applicant_sex==2 & coapplicant_sex!=5 & coapplicant_sex==1 // two applicants, one female + one male
	
	gen white=0
	replace white=. if applicant_race1==6 | applicant_race1==7 // not provided
	replace white=1 if applicant_race1==5 & coapplicant1_race1==8 // one applicant
	replace white=1 if applicant_race1==5 & coapplicant1_race1==5 // two applicants, both white
	replace white=0.5 if applicant_race1==5 & coapplicant1_race1!=8 & coapplicant1_race1<5 // two applicants, one white + one non-white
	replace white=0.5 if applicant_race1<5 & coapplicant1_race1!=8 & coapplicant1_race1==5 // two applicants, one non-white + one white
	
	destring applicant_income, gen(income) force
	* in thousands
	
	gen dti=loan_amount/income
	
	by ctn: egen white_mean=mean(white)
	by ctn: egen male_mean=mean(male)
	by ctn: egen income_mean=mean(income)
	by ctn: egen loan_mean=mean(loan_amount)
	by ctn: egen dti_mean=mean(dti)
	by ctn: keep if _n==1
	
	keep year ctn white_mean male_mean income_mean loan_mean dti_mean
	rename white_mean white
	rename male_mean male
	rename income_mean income
	rename loan_mean loan
	rename dti_mean dti
		
	save "$path/temp_hmda_`yy'.dta", replace
}

clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_`yy'.dta"
	erase "$path/temp_hmda_`yy'.dta"
}

merge 1:1 ctn year using "$path_analysis/hmda_ctn_year_analysis.dta"
drop _merge

order ctn year
sort ctn year
save "$path_analysis/hmda_ctn_year_analysis.dta", replace

* In the replication package, the actual analysis data "hmda_ctn_year_analysis.dta" is included in /data/analysis_data/


***************************************************************************************************************************************************


// HMDA Data - Calculating Number of Origination and Originated Amount //

* The raw data is publicly available (see README)
* Becasue of the data's large size, the replication package only includes a set of pseudo datasets
* The code below produces the analysis HMDA dataset for Table 3: Number of Origination and Originated Amount at the tract-year level
* In the replication package, the actual analysis data "hmda_origination_ctn_year_analysis.dta" is included

forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/
	
	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	order year state_code county_code ctn

	keep if loan_purpose==1 | loan_purpose==3 // home purchase | refinancing
	keep if property_type==1 // One-to-four family
	
	drop if action_type>=4
	
	bysort ctn: gen x=(action_type==1)
	by ctn: egen num_ori=sum(x)

	gen amount=x*loan_amount
	by ctn: egen amount_ori=sum(amount)
	replace amount_ori= amount_ori/1000 // in $M (the value is in $K in the raw data)
	
	drop x amount

	by ctn: keep if _n==1

	keep ctn year num_ori amount_ori

	save "$path/temp_hmda_`yy'.dta", replace
}

clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_`yy'.dta"
	erase "$path/temp_hmda_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_analysis/hmda_origination_ctn_year_analysis.dta", replace

* In the replication package, the actual analysis data "hmda_origination_ctn_year_analysis.dta" is included in /data/analysis_data/


***************************************************************************************************************************************************


// HMDA Data - Calculating Number of Applications //

* The raw data is publicly available (see README)
* Becasue of the data's large size, the replication package only includes a set of pseudo datasets
* The code below produces the analysis HMDA dataset for Table 5: Number of Applications at the tract-year level
* In the replication package, the actual analysis data "hmda_application_ctn_year_analysis.dta" is included


forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/
	
	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	order year state_code county_code ctn

	keep if loan_purpose==1 | loan_purpose==3 // home purchase | refinancing
	keep if property_type==1 // One-to-four family
	
	drop if action_type>=6
	
	bysort ctn: gen N=_N
	
	destring population, replace force
	drop if population==.
	gen NumApp=N/population
	
	by ctn: keep if _n==1

	keep ctn year NumApp

	save "$path/temp_hmda_`yy'.dta", replace
}

clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_`yy'.dta"
	erase "$path/temp_hmda_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_analysis/hmda_application_ctn_year_analysis.dta", replace

* In the replication package, the actual analysis data "hmda_application_ctn_year_analysis.dta" is included in /data/analysis_data/

***************************************************************************************************************************************************


// HMDA Data - Calculating Probability of Each Denial Reason //

* The raw data is publicly available (see README)
* Becasue of the data's large size, the replication package only includes a set of pseudo datasets
* The code below produces the analysis HMDA dataset for Table 7: Prob(Denial Reason) at the tract-year level
* In the replication package, the actual analysis data "hmda_denialreason_ctn_year_analysis.dta" is included

forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/
	
	keep if action_type==3 // focus on denied applications
	
	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	order year state_code county_code ctn

	keep if loan_purpose==1 | loan_purpose==3 // home purchase | refinancing
	keep if property_type==1 // One-to-four family
	
	destring denial_reason1, replace force
	destring denial_reason2, replace force
	destring denial_reason3, replace force	
	
	sort ctn
	
	forvalues i=1/9 {
		by ctn: egen r`i'=sum(denial_reason1==`i'|denial_reason2==`i'|denial_reason3==`i')
	}
	
	by ctn: gen tot_denial=_N // total number of denials
	
	drop if denial_reason1==. & denial_reason2==. & denial_reason3==.
	
	by ctn: keep if _n==1
	
	forvalues i=1/9 {
		gen prob`i'=r`i'/tot_denial
	}
	
	keep year ctn prob* 

	save "$path/temp_hmda_`yy'.dta", replace
}

clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_`yy'.dta"
	erase "$path/temp_hmda_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_analysis/hmda_denialreason_ctn_year_analysis.dta", replace

* In the replication package, the actual analysis data "hmda_denialreason_ctn_year_analysis.dta" is included in /data/analysis_data/

***************************************************************************************************************************************************


// HMDA Data - Calculating Denial Rate for the Subsample of Purchase Loans //

* The raw data is publicly available (see README)
* Becasue of the data's large size, the replication package only includes a set of pseudo datasets
* The code below produces the analysis HMDA dataset for Table 8: denial rate (for purchase loans) at the tract-year level
* In the replication package, the actual analysis data "hmda_purchase_ctn_year_analysis.dta" is included

forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/

	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	recast str2 state_code
	recast str3 county_code
	recast str12 ctn

	format state_code %2s
	format county_code %3s
	format ctn %12s
	
	order year state_code county_code ctn

	keep if loan_purpose==1 // In constrast with Table 2, here we keep home purchase only
	keep if property_type==1 // One-to-four family
	
	sort ctn
	forvalues i=1/8 {
		by ctn: egen a`i'=sum(action_type==`i')
	}
	
	/*
	action_type
	1 - Loan originated
	2 - Application approved but not accepted
	3 - Application denied by financial institution
	4 - Application withdrawn by applicant
	5 - File closed for incompleteness
	6 - Loan purchased by your institution
	7 - Preapproval request denied by financial institution
	8 - Preapproval request approved but not accepted 
	*/ 
	
	drop if action_type>=6
	by ctn: gen N=_N
	
	gen denial_rate=a3/(a1+a2+a3) // tract-level denial rate
	
	gen white=0
	replace white=1 if applicant_race1==5 & coapplicant1_race1==8
	replace white=0.5 if applicant_race1==5 & coapplicant1_race1!=8 & coapplicant1_race1!=5
	
	gen male=0
	replace male=1 if applicant_sex==1 & coapplicant_sex==5
	replace male=0.5 if applicant_sex==1 & coapplicant_sex!=5 & coapplicant_sex!=1
	
	destring applicant_income, gen(income) force
	* in thousands
	
	gen dti=loan_amount/income
	
	by ctn: egen white_mean=mean(white)
	by ctn: egen male_mean=mean(male)
	by ctn: egen income_mean=mean(income)
	by ctn: egen loan_mean=mean(loan_amount)
	by ctn: egen dti_mean=mean(dti)
	by ctn: keep if _n==1
	
	keep year ctn denial_rate white_mean male_mean income_mean loan_mean dti_mean N
	rename white_mean white
	rename male_mean male
	rename income_mean income
	rename loan_mean loan
	rename dti_mean dti
		
	save "$path/temp_hmda_purchase_`yy'.dta", replace
}


clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_purchase_`yy'.dta"
	erase "$path/temp_hmda_purchase_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_analysis/hmda_purchase_ctn_year_analysis.dta", replace

* In the replication package, the analysis data "hmda_purchase_ctn_year_analysis.dta" is included in /data/analysis_data/

***************************************************************************************************************************************************


// HMDA Data - Calculating Denial Rate for the Subsample of Refinance Loans //

* The raw data is publicly available (see README)
* Becasue of the data's large size, the replication package only includes a set of pseudo datasets
* The code below produces the analysis HMDA dataset for Table 8: denial rate (for refinance loans) at the tract-year level
* In the replication package, the actual analysis data "hmda_refinance_ctn_year_analysis.dta" is included

forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/

	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	recast str2 state_code
	recast str3 county_code
	recast str12 ctn

	format state_code %2s
	format county_code %3s
	format ctn %12s
	
	order year state_code county_code ctn

	keep if loan_purpose==3 // In constrast with Table 2, here we keep refinance mortgages only
	keep if property_type==1 // One-to-four family
	
	sort ctn
	forvalues i=1/8 {
		by ctn: egen a`i'=sum(action_type==`i')
	}
	
	/*
	action_type
	1 - Loan originated
	2 - Application approved but not accepted
	3 - Application denied by financial institution
	4 - Application withdrawn by applicant
	5 - File closed for incompleteness
	6 - Loan purchased by your institution
	7 - Preapproval request denied by financial institution
	8 - Preapproval request approved but not accepted 
	*/ 
	
	drop if action_type>=6
	by ctn: gen N=_N
	
	gen denial_rate=a3/(a1+a2+a3) // tract-level denial rate
	
	gen white=0
	replace white=1 if applicant_race1==5 & coapplicant1_race1==8
	replace white=0.5 if applicant_race1==5 & coapplicant1_race1!=8 & coapplicant1_race1!=5
	
	gen male=0
	replace male=1 if applicant_sex==1 & coapplicant_sex==5
	replace male=0.5 if applicant_sex==1 & coapplicant_sex!=5 & coapplicant_sex!=1
	
	
	destring applicant_income, gen(income) force
	* in thousands
	
	gen dti=loan_amount/income
	
	by ctn: egen white_mean=mean(white)
	by ctn: egen male_mean=mean(male)
	by ctn: egen income_mean=mean(income)
	by ctn: egen loan_mean=mean(loan_amount)
	by ctn: egen dti_mean=mean(dti)
	by ctn: keep if _n==1
	
	keep year ctn denial_rate white_mean male_mean income_mean loan_mean dti_mean N
	rename white_mean white
	rename male_mean male
	rename income_mean income
	rename loan_mean loan
	rename dti_mean dti
		
	save "$path/temp_hmda_refinance_`yy'.dta", replace
}


clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_refinance_`yy'.dta"
	erase "$path/temp_hmda_refinance_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_analysis/hmda_refinance_ctn_year_analysis.dta", replace

* In the replication package, the analysis data "hmda_refinance_ctn_year_analysis.dta" is included in /data/analysis_data/

***************************************************************************************************************************************************


// HMDA Data - Calculating Denial Rate for the Subsample of Home Improvement Loans //

* The raw data is publicly available (see README)
* Becasue of the data's large size, the replication package only includes a set of pseudo datasets
* The code below produces the analysis HMDA dataset for Table 8: denial rate (for home improvement loans) at the tract-year level
* In the replication package, the actual analysis data "hmda_improvement_ctn_year_analysis.dta" is included

forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/

	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	recast str2 state_code
	recast str3 county_code
	recast str12 ctn

	format state_code %2s
	format county_code %3s
	format ctn %12s
	
	order year state_code county_code ctn

	keep if loan_purpose==2 // In constrast with Table 2, here we keep home improvement mortgages only
	keep if property_type==1 // One-to-four family
	
	sort ctn
	forvalues i=1/8 {
		by ctn: egen a`i'=sum(action_type==`i')
	}
	
	/*
	action_type
	1 - Loan originated
	2 - Application approved but not accepted
	3 - Application denied by financial institution
	4 - Application withdrawn by applicant
	5 - File closed for incompleteness
	6 - Loan purchased by your institution
	7 - Preapproval request denied by financial institution
	8 - Preapproval request approved but not accepted 
	*/ 
	
	drop if action_type>=6
	
	gen denial_rate=a3/(a1+a2+a3+a4+a5) // tract-level denial rate
	
	gen white=0
	replace white=1 if applicant_race1==5 & coapplicant1_race1==8
	replace white=0.5 if applicant_race1==5 & coapplicant1_race1!=8 & coapplicant1_race1!=5
	
	gen male=0
	replace male=1 if applicant_sex==1 & coapplicant_sex==5
	replace male=0.5 if applicant_sex==1 & coapplicant_sex!=5 & coapplicant_sex!=1

	
	destring applicant_income, gen(income) force
	* in thousands
	
	gen dti=loan_amount/income
	
	by ctn: egen white_mean=mean(white)
	by ctn: egen male_mean=mean(male)
	by ctn: egen income_mean=mean(income)
	by ctn: egen loan_mean=mean(loan_amount)
	by ctn: egen dti_mean=mean(dti)
	by ctn: keep if _n==1
	
	keep year ctn denial_rate white_mean male_mean income_mean loan_mean dti_mean
	rename white_mean white
	rename male_mean male
	rename income_mean income
	rename loan_mean loan
	rename dti_mean dti
		
	save "$path/temp_hmda_improvement_`yy'.dta", replace
}

clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_improvement_`yy'.dta"
	erase "$path/temp_hmda_improvement_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_analysis/hmda_improvement_ctn_year_analysis.dta", replace

* In the replication package, the analysis data "hmda_improvement_ctn_year_analysis.dta" is included in /data/analysis_data/

***************************************************************************************************************************************************


// HMDA Data - Calculating Denial Rate for the Subsample of Owner-occupied Loans //

* The raw data is publicly available (see README)
* Becasue of the data's large size, the replication package only includes a set of pseudo datasets
* The code below produces the analysis HMDA dataset for Table 9: denial rate (for owner-occupied loans) at the tract-year level
* In the replication package, the actual analysis data "hmda_owner_ctn_year_analysis.dta" is included

forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/

	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	recast str2 state_code
	recast str3 county_code
	recast str12 ctn

	format state_code %2s
	format county_code %3s
	format ctn %12s
	
	order year state_code county_code ctn

	keep if loan_purpose==1 | loan_purpose==3 // home purchase | refinance
	keep if property_type==1 // One-to-four family
	
	keep if occupancy==1 // In contrast with Table 2, we only keep owner-occupied homes
	
	sort ctn
	forvalues i=1/8 {
		by ctn: egen a`i'=sum(action_type==`i')
	}
	
	/*
	action_type
	1 - Loan originated
	2 - Application approved but not accepted
	3 - Application denied by financial institution
	4 - Application withdrawn by applicant
	5 - File closed for incompleteness
	6 - Loan purchased by your institution
	7 - Preapproval request denied by financial institution
	8 - Preapproval request approved but not accepted 
	*/ 
	
	drop if action_type>=6
	
	gen denial_rate=a3/(a1+a2+a3) // tract-level denial rate
	
	gen male=0
	replace male=. if applicant_sex==3 | applicant_sex==4 // not provided
	replace male=1 if applicant_sex==1 & coapplicant_sex==5 // one applicant
	replace male=1 if applicant_sex==1 & coapplicant_sex==1 // two applicants, both male
	replace male=0.5 if applicant_sex==1 & coapplicant_sex!=5 & coapplicant_sex==2 // two applicants, one male + one female
	replace male=0.5 if applicant_sex==2 & coapplicant_sex!=5 & coapplicant_sex==1 // two applicants, one female + one male
	
	gen white=0
	replace white=. if applicant_race1==6 | applicant_race1==7 // not provided
	replace white=1 if applicant_race1==5 & coapplicant1_race1==8 // one applicant
	replace white=1 if applicant_race1==5 & coapplicant1_race1==5 // two applicants, both white
	replace white=0.5 if applicant_race1==5 & coapplicant1_race1!=8 & coapplicant1_race1<5 // two applicants, one white + one non-white
	replace white=0.5 if applicant_race1<5 & coapplicant1_race1!=8 & coapplicant1_race1==5 // two applicants, one non-white + one white
	
	destring applicant_income, gen(income) force
	* in thousands
	
	gen dti=loan_amount/income
	
	by ctn: egen white_mean=mean(white)
	by ctn: egen male_mean=mean(male)
	by ctn: egen income_mean=mean(income)
	by ctn: egen loan_mean=mean(loan_amount)
	by ctn: egen dti_mean=mean(dti)
	by ctn: keep if _n==1
	
	keep year ctn denial_rate white_mean male_mean income_mean loan_mean dti_mean
	rename white_mean white
	rename male_mean male
	rename income_mean income
	rename loan_mean loan
	rename dti_mean dti
		
	save "$path/temp_hmda_owner_`yy'.dta", replace
}


clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_owner_`yy'.dta"
	erase "$path/temp_hmda_owner_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_analysis/hmda_owner_ctn_year_analysis.dta", replace

* In the replication package, the analysis data "hmda_owner_ctn_year_analysis.dta" is included in /data/analysis_data/

***************************************************************************************************************************************************


// HMDA Data - Calculating Denial Rate for the Subsample of Non-owner-occupied Loans //

* The raw data is publicly available (see README)
* Becasue of the data's large size, the replication package only includes a set of pseudo datasets
* The code below produces the analysis HMDA dataset for Table 9: denial rate (for non-owner-occupied loans) at the tract-year level
* In the replication package, the actual analysis data "hmda_nonowner_ctn_year_analysis.dta" is included

forvalues yy=2007/2016 {
	
	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/

	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	recast str2 state_code
	recast str3 county_code
	recast str12 ctn

	format state_code %2s
	format county_code %3s
	format ctn %12s
	
	order year state_code county_code ctn

	keep if loan_purpose==1 | loan_purpose==3 // home purchase | refinance
	keep if property_type==1 // One-to-four family
	
	keep if occupancy==2 // In contrast with Table 2, we only keep non-owner-occupied homes
	
	sort ctn
	forvalues i=1/8 {
		by ctn: egen a`i'=sum(action_type==`i')
	}
	
	/*
	action_type
	1 - Loan originated
	2 - Application approved but not accepted
	3 - Application denied by financial institution
	4 - Application withdrawn by applicant
	5 - File closed for incompleteness
	6 - Loan purchased by your institution
	7 - Preapproval request denied by financial institution
	8 - Preapproval request approved but not accepted 
	*/ 
	
	drop if action_type>=6
	
	gen denial_rate=a3/(a1+a2+a3) // tract-level denial rate
	
	gen male=0
	replace male=. if applicant_sex==3 | applicant_sex==4 // not provided
	replace male=1 if applicant_sex==1 & coapplicant_sex==5 // one applicant
	replace male=1 if applicant_sex==1 & coapplicant_sex==1 // two applicants, both male
	replace male=0.5 if applicant_sex==1 & coapplicant_sex!=5 & coapplicant_sex==2 // two applicants, one male + one female
	replace male=0.5 if applicant_sex==2 & coapplicant_sex!=5 & coapplicant_sex==1 // two applicants, one female + one male
	
	gen white=0
	replace white=. if applicant_race1==6 | applicant_race1==7 // not provided
	replace white=1 if applicant_race1==5 & coapplicant1_race1==8 // one applicant
	replace white=1 if applicant_race1==5 & coapplicant1_race1==5 // two applicants, both white
	replace white=0.5 if applicant_race1==5 & coapplicant1_race1!=8 & coapplicant1_race1<5 // two applicants, one white + one non-white
	replace white=0.5 if applicant_race1<5 & coapplicant1_race1!=8 & coapplicant1_race1==5 // two applicants, one non-white + one white
	
	destring applicant_income, gen(income) force
	* in thousands
	
	gen dti=loan_amount/income
	
	by ctn: egen white_mean=mean(white)
	by ctn: egen male_mean=mean(male)
	by ctn: egen income_mean=mean(income)
	by ctn: egen loan_mean=mean(loan_amount)
	by ctn: egen dti_mean=mean(dti)
	by ctn: keep if _n==1
	
	keep year ctn denial_rate white_mean male_mean income_mean loan_mean dti_mean
	rename white_mean white
	rename male_mean male
	rename income_mean income
	rename loan_mean loan
	rename dti_mean dti
		
	save "$path/temp_hmda_nonowner_`yy'.dta", replace
}


clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_nonowner_`yy'.dta"
	erase "$path/temp_hmda_nonowner_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_analysis/hmda_nonowner_ctn_year_analysis.dta", replace

* In the replication package, the analysis data "hmda_nonowner_ctn_year_analysis.dta" is included in /data/analysis_data/
***************************************************************************************************************************************************


* Data for loan-level regression in Tables 6 and 10

* The raw data is publicly available (see README)
* Becasue of the data's large size, the Replication Package only includes a set of pseudo datasets

forvalues yy=2007/2016 {

	use "$path_raw/raw_hmda_`yy'.dta", clear // Pseudo data "$path_raw/raw_hmda_`yy'_pseudo.dta" is provided in /data/raw/
	
	keep year state_code county_code ctn loan_type loan_purpose property_type occupancy loan_amount action_type ///
	applicant_ethnicity applicant_race1 coapplicant_ethnicity coapplicant1_race1 applicant_sex coapplicant_sex applicant_income population ///
	denial_reason1 denial_reason2 denial_reason3 respondent_id agency
	
	rename agency agencycode
					
	drop if county_code=="NA "
	drop if ctn==""
	replace ctn=state_code+county_code+ctn
	replace ctn=subinstr(ctn,".","",.)
	
	recast str2 state_code
	recast str3 county_code
	recast str12 ctn

	format state_code %2s
	format county_code %3s
	format ctn %12s
	
	order year state_code county_code ctn

	keep if loan_purpose==1 | loan_purpose==3 // home purchase | refinancing
	keep if property_type==1 // One-to-four family
	
	drop if action_type>=4
	
	gen loan_reject=0
	replace loan_reject=1 if action_type==3
	
	gen male=0
	replace male=. if applicant_sex==3 | applicant_sex==4 // not provided
	replace male=1 if applicant_sex==1 & coapplicant_sex==5 // one applicant
	replace male=1 if applicant_sex==1 & coapplicant_sex==1 // two applicants, both male
	replace male=0.5 if applicant_sex==1 & coapplicant_sex!=5 & coapplicant_sex==2 // two applicants, one male + one female
	replace male=0.5 if applicant_sex==2 & coapplicant_sex!=5 & coapplicant_sex==1 // two applicants, one female + one male
	
	gen white=0
	replace white=. if applicant_race1==6 | applicant_race1==7 // not provided
	replace white=1 if applicant_race1==5 & coapplicant1_race1==8 // one applicant
	replace white=1 if applicant_race1==5 & coapplicant1_race1==5 // two applicants, both white
	replace white=0.5 if applicant_race1==5 & coapplicant1_race1!=8 & coapplicant1_race1<5 // two applicants, one white + one non-white
	replace white=0.5 if applicant_race1<5 & coapplicant1_race1!=8 & coapplicant1_race1==5 // two applicants, one non-white + one white
	
	destring applicant_income, gen(income) force
	* in thousands
	
	gen dti=loan_amount/income
	
	destring state_code, gen(state_num)
	gen FIPS=state_code+county_code
	destring FIPS, gen(county_num)
	
	keep year ctn white male income loan_amount dti loan_reject occupancy loan_purpose loan_type property_type ///
	respondent_id agencycode state_num county_num
	
	save "$path/temp_hmda_`yy'.dta", replace
}


clear
forvalues yy=2007/2016 {
	append using "$path/temp_hmda_`yy'.dta"
	erase "$path/temp_hmda_`yy'.dta"
}

order ctn year
sort ctn year
save "$path_raw/hmda_loanlevel_all.dta", replace

* The above code produces the analysis HMDA dataset at the loan level
* In the replication package, pseudo data is provided in "$path_raw/hmda_loanlevel_all_pseudo.dta"





